package factory;

import config.CrawlerConfig;
import crawler.CrawlerPage;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;

/**
 * @Description:
 * @Authoer: Max Woods
 * @Date: 2018/6/15 13:58
 **/


public class UrlCrawlControllerFactory {

    public static  final CrawlController getCrawlerPageController(String url, Class clazz) throws Exception
    {
        String crawlStorageFolder = "/data/crawl/root";
        int numberOfCrawlers = 1;
        CrawlConfig config = new CrawlConfig();
        config.setMaxDepthOfCrawling(3);
        config.setCrawlStorageFolder(crawlStorageFolder);
        config.setUserAgentString(CrawlerConfig.agent);
        PageFetcher pageFetcher = new PageFetcher(config);
        RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
        RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
        CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
        controller.addSeed(url);
        controller.start(clazz,numberOfCrawlers);
        return controller;
    }

}